


# Setup ------------------------------------------------------------------------
  # Options
  options(stringsAsFactors = FALSE)
  # Packages
  library(readstata13)
  library(magrittr)
  library(data.table)
  #library(stringr)
  #library(lubridate)
  library(lfe)
  library(foreach)
  library(doParallel)
  # Set the number of threads for the lfe package
  options(lfe.threads = 8)
  # Directories of joined bill-weather files
  dir_data       <- "S:/Edward/JoinedBillWeather/"
  dir_data_pge   <- paste0(dir_data, "PGE/")
  dir_data_socal <- paste0(dir_data, "SoCal/")
  dir_data_sdge  <- paste0(dir_data, "SDGE/")
  dir_elasticity <- "S:/Edward/Elasticity/"
  dir_csv        <- paste0(dir_elasticity, "DataCsv/")


# Find zip codes shared by utilities -------------------------------------------
  # The files
  pge_files <- dir_data_pge %>% dir()
  socal_files <- dir_data_socal %>% dir()
  sdge_files <- dir_data_sdge %>% dir()
  # The zip codes
  pge_zips <- gsub("[^0-9]", "", pge_files)
  socal_zips <- gsub("[^0-9]", "", socal_files)
  sdge_zips <- gsub("[^0-9]", "", sdge_files)
  # Intersections between utilities
  pge_socal <- intersect(pge_zips, socal_zips)
  pge_sdge <- intersect(pge_zips, sdge_zips)
  socal_sdge <- intersect(socal_zips, sdge_zips)
  all3 <- intersect(pge_socal, sdge_zips)
  # The files for zip codes in the intersections
  pge_files <- paste0(dir_data_pge, "joined_pge_", pge_socal, ".dta")
  socal_files <- paste0(dir_data_socal, "joined_socal_", pge_socal, ".dta")
  rm(sdge_files)
  # Save the 5-digit-zip-code interesections as a CSV
  write.csv(x = data.frame(zip = pge_socal),
    file = paste0(dir_csv, "commonZipsPgeSocal.csv"),
    row.names = F)


# Open files -------------------------------------------------------------------
  # NOTE: There is almost no intersection at the 9-digit level
  ## Count obsevations/households in the same 9-digit zip code across utilities
  #int_dt <- lapply(X = seq(1, length(pge_socal)), FUN = function(i) {
  #  # Open the PGE and SoCal files
  #  pge_dt   <- read.dta13(pge_files[i]) %>% data.table()
  #  socal_dt <- read.dta13(socal_files[i]) %>% data.table()
  #  # Find the intersection of the 4-digit zip suffixes
  #  zip4_int <- intersect(unique(pge_dt$zip_plus4), unique(socal_dt$zip_plus4))
  #  # Find the number of observations and households in the shared 9-digit zips
  #  zip_dt <- data.table(
  #    zip     = pge_socal[i],
  #    pge_n   = pge_dt[zip_plus4 %in% zip4_int] %>% nrow(),
  #    socal_n = socal_dt[zip_plus4 %in% zip4_int] %>% nrow(),
  #    pge_hh   = pge_dt[zip_plus4 %in% zip4_int]$hh_id %>%
  #      unique() %>% length(),
  #    socal_hh = socal_dt[zip_plus4 %in% zip4_int]$hh_id %>%
  #      unique() %>% length()
  #    )
  #  # Return the info
  #  return(zip_dt)
  #}) %>% rbindlist()
  # Count obsevations/households in the same 5-digit zip code across utilities
  same_dt <- lapply(X = seq_along(pge_socal), FUN = function(i) {
    # Open the PGE and SoCal files
    pge_dt   <- read.dta13(pge_files[i]) %>% data.table()
    socal_dt <- read.dta13(socal_files[i]) %>% data.table()
    # Find the number of observations and households in the shared 9-digit zips
    zip_dt <- data.table(
      zip     = pge_socal[i],
      pge_n   = pge_dt %>% nrow(),
      socal_n = socal_dt %>% nrow(),
      pge_hh   = pge_dt$hh_id %>% unique() %>% length(),
      socal_hh = socal_dt$hh_id %>% unique() %>% length()
      )
    # Return the info
    return(zip_dt)
  }) %>% rbindlist()
  # Save the counts
  write.csv(x = same_dt,
    file = paste0(dir_csv, "commonZipsCountsPgeSocal.csv"),
    row.names = F)
